options(scipen = 999)from:
https://www.youtube.com/watch?v=7oz1qGClrl0 https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-07-21
library(tidyverse)Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ─────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.6 ✔ purrr 0.3.4
✔ tibble 3.1.7 ✔ dplyr 1.0.9
✔ tidyr 1.2.0 ✔ stringr 1.4.0
✔ readr 2.1.2 ✔ forcats 0.5.1
── Conflicts ────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
animal_outcomes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/animal_outcomes.csv')Rows: 664 Columns: 12
── Column specification ────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): animal_type, outcome
dbl (10): year, ACT, NSW, NT, QLD, SA, TAS, VIC, WA, Total
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
animal_complaints <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/animal_complaints.csv')Rows: 42413 Columns: 5
── Column specification ────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (5): Animal Type, Complaint Type, Date Received, Suburb, Electoral Division
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
brisbane_complaints <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/brisbane_complaints.csv')Rows: 31330 Columns: 7
── Column specification ────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (7): nature, animal_type, category, suburb, date_range, responsible_office, city
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(animal_outcomes)animal_outcomes %>% glimpse()Rows: 664
Columns: 12
$ year <dbl> 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 1999, 19…
$ animal_type <chr> "Dogs", "Dogs", "Dogs", "Dogs", "Cats", "Cats", "Cats", "Cats", "Horses", "Horses", "H…
$ outcome <chr> "Reclaimed", "Rehomed", "Other", "Euthanized", "Reclaimed", "Rehomed", "Other", "Eutha…
$ ACT <dbl> 610, 1245, 12, 360, 111, 1442, 0, 1007, 0, 1, 0, 0, 2, 90, 39, 9, 390, 173, 746, 31, 2…
$ NSW <dbl> 3140, 7525, 745, 9221, 201, 3913, 447, 8205, 0, 12, 0, 8, 15, 719, 15, 49, 26, 597, 18…
$ NT <dbl> 205, 526, 955, 9, 22, 269, 0, 847, 1, 3, 0, 0, 0, 120, 0, 0, 6, 32, 50, 5, 60, 0, 10, …
$ QLD <dbl> 1392, 5489, 860, 9214, 206, 3901, 386, 10554, 0, 3, 11, 1, 9, 88, 217, 109, 1461, 0, 1…
$ SA <dbl> 2329, 1105, 380, 1701, 157, 1055, 46, 3415, 2, 10, 1, 0, 13, 61, 2, 54, 175, 446, 861,…
$ TAS <dbl> 516, 480, 168, 599, 31, 752, 124, 1056, 1, 0, 2, 0, 1, 25, 6, 2, 66, 127, 75, 5, 63, 2…
$ VIC <dbl> 7130, 4908, 1001, 5217, 884, 3768, 1501, 6113, 87, 19, 0, 4, 418, 315, 18, 179, 4582, …
$ WA <dbl> 1, 137, 6, 18, 0, 62, 5, 5, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 12, 1, 456, 755, 94…
$ Total <dbl> 15323, 21415, 4127, 26339, 1612, 15162, 2509, 31202, 91, 48, 14, 13, 458, 1421, 297, 4…
animal_outcomes %>% count(animal_type)animal_outcomes %>% count(outcome)animal_complaints %>% head()animal_complaints %>% glimpse()Rows: 42,413
Columns: 5
$ `Animal Type` <chr> "dog", "dog", "dog", "dog", "dog", "dog", "dog", "dog", "dog", "dog", "dog", …
$ `Complaint Type` <chr> "Aggressive Animal", "Noise", "Noise", "Private Impound", "Wandering", "Attac…
$ `Date Received` <chr> "June 2020", "June 2020", "June 2020", "June 2020", "June 2020", "June 2020",…
$ Suburb <chr> "Alice River", "Alice River", "Alice River", "Alice River", "Alice River", "B…
$ `Electoral Division` <chr> "Division 1", "Division 1", "Division 1", "Division 1", "Division 1", "Divisi…
animal_complaints %>% n_distinct("Complaint Type")[1] 16667
animal_complaints %>% summarise_all(n_distinct)animal_complaints %>% count(`Complaint Type`) animal_complaints %>%
count(`Complaint Type`) %>%
ggplot(aes(x = `Complaint Type`, y = n)) +
geom_col()animal_complaints <- animal_complaints %>%
mutate_if(is.character, as.factor)
str(animal_complaints)tibble [42,413 × 5] (S3: tbl_df/tbl/data.frame)
$ Animal Type : Factor w/ 2 levels "cat","dog": 2 2 2 2 2 2 2 2 2 2 ...
$ Complaint Type : Factor w/ 6 levels "Aggressive Animal",..: 1 4 4 5 6 2 3 6 3 3 ...
$ Date Received : Factor w/ 81 levels "April 2014","April 2015",..: 47 47 47 47 47 47 47 47 47 47 ...
$ Suburb : Factor w/ 85 levels "Aitkenvale","Alice River",..: 2 2 2 2 2 10 10 10 11 11 ...
$ Electoral Division: Factor w/ 11 levels "Division 1","Division 10",..: 1 1 1 1 1 1 1 1 1 1 ...
animal_complaints %>%
select(`Complaint Type`, `Electoral Division`) %>%
group_by(`Electoral Division`, `Complaint Type`) %>%
summarise(counts = n() ) %>%
ggplot(aes(`Electoral Division`, counts, fill = `Complaint Type`)) +
geom_col() +
theme(axis.text.x = element_text(angle = 90))`summarise()` has grouped output by 'Electoral Division'. You can override using the `.groups` argument.
animal_complaints %>%
select(`Animal Type`, `Electoral Division`) %>%
group_by(`Electoral Division`, `Animal Type`) %>%
summarise(counts = n() ) %>%
ggplot(aes(`Electoral Division`, counts, fill = `Animal Type`)) +
geom_col() +
theme(axis.text.x = element_text(angle = 90))`summarise()` has grouped output by 'Electoral Division'. You can override using the `.groups` argument.
animal_complaints %>%
select(`Animal Type`, `Complaint Type`) %>%
group_by(`Complaint Type`, `Animal Type`) %>%
summarise(counts = n() ) %>%
ggplot(aes(`Complaint Type`, counts, fill = `Animal Type`)) +
geom_col() +
theme(axis.text.x = element_text(angle = 90))`summarise()` has grouped output by 'Complaint Type'. You can override using the `.groups` argument.
animal_complaints <- animal_complaints %>%
rename_all(.funs = function(.x){
.x %>% tolower() %>% str_replace(pattern = " ", replacement = "_")
})animal_complaints %>% head()animal_outcomes %>% head()convert_to_frac <- function(var, total){
return(var / total)
}
animal_outcomes %>%
mutate(across(ACT:WA, ~convert_to_frac(var = .x, total = Total )))NAconvert_to_frac_df <- function(df) {
df %>%
mutate(across(ACT:WA, ~convert_to_frac(var = .x, total = Total )))
}
convert_to_frac_df(animal_outcomes)animal_outcomes %>% convert_to_frac_df()use . instead of df
tiday_frac <- . %>% mutate(across(ACT:WA, ~convert_to_frac(var = .x, total = Total )))
animal_outcomes %>% tiday_frac()animal_outcomes %>%
select(outcome) %>%
count(outcome) %>%
mutate(outcome = reorder(outcome, n)) %>%
ggplot(aes(x = outcome, y = n, fill = outcome)) +
geom_col() +
theme_bw() +
coord_flip()factors_bar_chart <- function(df, var){
var <- enquo(var)
df %>%
select(!!var) %>%
count(!!var) %>%
mutate(!!var := reorder(!!var, n)) %>%
ggplot(aes(x = !!var, y = n, fill = !!var)) +
geom_col() +
theme_bw() +
coord_flip()
}
factors_bar_chart(animal_outcomes, outcome)factors_bar_chart(animal_outcomes, animal_type)brisbane_complaints %>% glimpse()Rows: 31,330
Columns: 7
$ nature <chr> "Animal", "Animal", "Animal", "Animal", "Animal", "Animal", "Animal", "Animal",…
$ animal_type <chr> "Dog", "Dog", "Dog", "Dog", "Attack", "Attack", "Dog", "Attack", "Dog", "Dog", …
$ category <chr> "Fencing Issues", "Fencing Issues", "Defecating In Public", "Fencing Issues", "…
$ suburb <chr> "SUNNYBANK", "SUNNYBANK HILLS", "SUNNYBANK", "SUNNYBANK", "CALAMVALE", "STRETTO…
$ date_range <chr> "1st-quarter-2016-17.csv", "1st-quarter-2016-17.csv", "1st-quarter-2016-17.csv"…
$ responsible_office <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ city <chr> "Brisbane", "Brisbane", "Brisbane", "Brisbane", "Brisbane", "Brisbane", "Brisba…
brisbane_complaints %>% map_dbl(~n_distinct(.x)) nature animal_type category suburb date_range
1 5 24 192 17
responsible_office city
9 1
brisbane_complaints <- brisbane_complaints %>%
mutate_if(is.character, as.factor)
str(brisbane_complaints)tibble [31,330 × 7] (S3: tbl_df/tbl/data.frame)
$ nature : Factor w/ 1 level "Animal": 1 1 1 1 1 1 1 1 1 1 ...
$ animal_type : Factor w/ 5 levels "Attack","Cat",..: 4 4 4 4 1 1 4 1 4 4 ...
$ category : Factor w/ 23 levels "Attack On A Person",..: 7 7 5 7 2 1 NA 2 7 5 ...
$ suburb : Factor w/ 191 levels "ACACIA RIDGE",..: 162 163 162 162 29 160 5 63 73 73 ...
$ date_range : Factor w/ 17 levels "1st-quarter-2016-17.csv",..: 1 1 1 1 1 1 1 1 1 1 ...
$ responsible_office: Factor w/ 8 levels "City Safety (Animal Management)",..: NA NA NA NA NA NA NA NA NA NA ...
$ city : Factor w/ 1 level "Brisbane": 1 1 1 1 1 1 1 1 1 1 ...
brisbane_complaints %>%
filter(suburb == "SUNNYBANK") %>%
count(category) %>%
drop_na() %>%
mutate(category = reorder(category, n)) %>%
ggplot(aes(x = category, y =n, fill = category)) +
geom_col() +
coord_flip() +
theme_bw()brisbane_complaints %>%
filter(suburb == "SUNNYBANK",
animal_type == "Attack") %>%
count(category) %>%
drop_na() %>%
mutate(category = reorder(category, n)) %>%
ggplot(aes(x = category, y =n, fill = category)) +
geom_col() +
coord_flip() +
theme_bw()brisbane_complaints %>%
filter(animal_type == "Attack") %>%
count(suburb, category) %>%
drop_na() %>%
mutate(category = reorder(category, n))
save_charts_func <- function(df, filename){
temp_chart <- df %>%
mutate(category = reorder(category, n)) %>%
ggplot(aes(x = category, y =n, fill = category)) +
geom_col() +
coord_flip() +
theme_bw() +
ggtitle(paste0(filename,"Attacks"))
ggsave(filename = paste0(filename, ".pdf"),
plot = temp_chart,
width = 11, height = 8.5, units = "in")
}brisbane_complaints %>%
filter(animal_type == "Attack") %>%
count(suburb, category) %>%
drop_na() %>%
nest(-suburb)
library(magrittr)
Attaching package: ‘magrittr’
The following object is masked from ‘package:purrr’:
set_names
The following object is masked from ‘package:tidyr’:
extract
brisbane_complaints %>%
filter(animal_type == "Attack") %>%
count(suburb, category) %>%
drop_na() %>%
nest(-suburb) %>%
mutate(suburb = str_replace(suburb, " ","_")) %$%
walk2(.x = data, .y = suburb, .f = save_charts_func)Another way of saving charts
from: https://youtu.be/GxvccD8K49M?t=3262 (About Functional Programming, Purr package)
# dir.create("charts_images")
save_charts_func2 <- function(df, filename){
temp_chart <- df %>%
mutate(category = reorder(category, n)) %>%
ggplot(aes(x = category, y =n, fill = category)) +
geom_col() +
coord_flip() +
theme_bw() +
ggtitle(paste0(filename,"Attacks"))
ggsave(filename = paste0("charts_images/",filename, ".png"),
plot = temp_chart,
width = 11, height = 8.5, units = "in")
}brisbane_complaints %>%
filter(animal_type == "Attack") %>%
count(suburb, category) %>%
drop_na() %>%
nest(-suburb) %>%
mutate(suburb = str_replace(suburb, " ","_")) %$%
walk2(.x = data, .y = suburb, .f = save_charts_func2)from: https://youtu.be/GxvccD8K49M?t=2832
# install.packages("NHSRdatasets")library(NHSRdatasets)ae_attendances %>% head()ae_attendances %>%
filter(org_code %>% str_starts("R")) %>% head()ae_attendances %>%
filter(org_code %>% str_starts("R")) %>%
group_by(org_code, period) %>%
summarise(attendances = sum(attendances))`summarise()` has grouped output by 'org_code'. You can override using the `.groups` argument.
ae_attendances %>%
filter(org_code %>% str_starts("R")) %>%
group_by(org_code, period) %>%
summarise(attendances = sum(attendances)) %>%
nest()`summarise()` has grouped output by 'org_code'. You can override using the `.groups` argument.
ae_attendances %>%
filter(str_starts(org_code, "R")) %>%
group_by(org_code, period) %>%
summarise(attendances = sum(attendances)) %>%
nest()`summarise()` has grouped output by 'org_code'. You can override using the `.groups` argument.
ae_attendances %>%
filter(org_code %>% str_starts("R")) %>%
group_by(org_code, period) %>%
summarise(attendances = sum(attendances)) %>%
nest()`summarise()` has grouped output by 'org_code'. You can override using the `.groups` argument.
.Last.value$data[[1]]ae_attendances %>%
filter(org_code %>% str_starts("R")) %>%
group_by(org_code, period) %>%
summarise(attendances = sum(attendances)) %>%
nest() %>%
filter(map_dbl(data, nrow) == 36)`summarise()` has grouped output by 'org_code'. You can override using the `.groups` argument.
plot_fn <- function(org_code, data){
data %>%
ggplot(aes(period, attendances)) +
geom_line() +
geom_point() +
labs(title = org_code) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90))
}
ae_attendances %>%
filter(org_code %>% str_starts("R")) %>%
group_by(org_code, period) %>%
summarise(attendances = sum(attendances)) %>%
nest() %>%
filter(map_dbl(data, nrow) == 36) %>%
mutate(plot = map2(.x = org_code, .y = data, .f = plot_fn))`summarise()` has grouped output by 'org_code'. You can override using the `.groups` argument.
# dir.create("nhsr_charts")
ae_attendances %>%
filter(org_code %>% str_starts("R")) %>%
group_by(org_code, period) %>%
summarise(attendances = sum(attendances)) %>%
nest() %>%
filter(map_dbl(data, nrow) == 36) %>%
# creating plot
mutate(plot = map2(.x = org_code, .y = data, .f = plot_fn)) %>%
# creating file names
mutate(filename = paste0("nhsr_charts/", org_code, ".png")) `summarise()` has grouped output by 'org_code'. You can override using the `.groups` argument.
# dir.create("nhsr_charts")
ae_attendances %>%
filter(org_code %>% str_starts("R")) %>%
group_by(org_code, period) %>%
summarise(attendances = sum(attendances)) %>%
nest() %>%
filter(map_dbl(data, nrow) == 36) %>%
# creating plot
mutate(plot = map2(.x = org_code, .y = data, .f = plot_fn)) %>%
# creating file names
mutate(filename = paste0("nhsr_charts/", org_code, ".png")) %>%
ungroup() %>%
# selecting only plots column to save plots
head(10) %>%
select(plot, filename) %>%
#saving plots
pwalk(ggsave)`summarise()` has grouped output by 'org_code'. You can override using the `.groups` argument.
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
# dir.create("nhsr_charts2")
library(magrittr)
ae_attendances %>%
filter(org_code %>% str_starts("R")) %>%
group_by(org_code, period) %>%
summarise(attendances = sum(attendances)) %>%
nest() %>%
filter(map_dbl(data, nrow) == 36) %>%
# creating plot
mutate(plot_var = map2(.x = org_code, .y = data, .f = plot_fn)) %>%
# creating file names
mutate(filename = paste0("nhsr_charts2/", org_code, ".png")) %>%
ungroup() %>%
# selecting only plots column to save plots
head(10) %>%
select(plot_var, filename) %$%
#saving plots
walk2(.x = filename, .y = plot_var, .f = ggsave)`summarise()` has grouped output by 'org_code'. You can override using the `.groups` argument.
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image
Saving 7 x 7 in image